﻿using JiebaNet.Analyser;
using JiebaNet.Segmenter;
using JiebaNet.Segmenter.Common;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using WordCloudSharp;

namespace 文本分析程序demo
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        string InputStr = "";//输入文字
        string[] stop_word = new string[] { "虽", "虽然", "然", "而", "然而", "不过", "却", "是", "但是", "但", "的" };//停用词列表
        private void DelWordBtn_Click(object sender, EventArgs e)//剔除词语
        {
            string str = DelWordText.Text;//获取需要剔除的文本
            InputStr = InputRichBox.Text;//获取输入文本
            if (str!=string.Empty)//文本框不为空的情况下执行
            {
                if (InputStr.IndexOf(str) > -1)//当词语存在时执行
                {
                    long begin = System.DateTime.Now.Ticks;
                    OutPutRichBox.Text= InputStr.Replace(str, "");
                    long end = System.DateTime.Now.Ticks; //100毫微秒
                    int time = (int)((end - begin) / 10000); //ms
                    TimetBox.Text = "本次统计耗时:" + (time) + "ms";
                }
                else
                    MessageBox.Show("当前词语已经不存在在输入文本中","提示");
            }
        }

        private void CountWords_Click(object sender, EventArgs e)//统计词频
        {
            string str = DelWordText.Text;//获取需要统计的文本
            InputStr = InputRichBox.Text;//获取输入文本
            if (str != string.Empty)//文本框不为空的情况下执行
            {
                if (InputStr.IndexOf(str) > -1)//当词语存在时执行
                {
                    long begin = System.DateTime.Now.Ticks;
                    int Count = 0;
                    while(InputStr.Contains(str))
                    {
                        InputStr=InputStr.Remove(InputStr.IndexOf(str), str.Length);
                        Count++;
                    }
                    OutPutRichBox.Text = str + " "+"出现次数："+Count;
                    long end = System.DateTime.Now.Ticks; //100毫微秒
                    int time = (int)((end - begin) / 10000); //ms
                    TimetBox.Text = "本次统计耗时:" + (time) + "ms";
                }
                else
                    MessageBox.Show("当前词语不存在在输入文本中", "提示");
            }
        }

        private void CountAllWord_Click(object sender, EventArgs e)//统计全部词频
        {
            InputStr = InputRichBox.Text;//拿到输入文本
            int nums = 0;//显示词数
            if (InputStr == string.Empty) return;
            long begin = System.DateTime.Now.Ticks;
            if (FuhaoCheckBox.Checked)
                InputStr=FiterSomething(InputStr, 1);//过滤符号
            if (StopWordCheckBox.Checked)
                InputStr = FiterSomething(InputStr, 2);//过滤停用词            
            string res = "";//存放结果
            var segmenter = new JiebaSegmenter();//实例化
            var freqs = new Counter<string>(segmenter.Cut(InputStr));//词频类
            
            if(CountNums.Text==string.Empty)
            {
                foreach (var pair in freqs.MostCommon())//获得全部词频
                {
                    res += $"{pair.Key} : {pair.Value}\n";
                }
            }
            else
            {
                try
                {
                    nums = Convert.ToInt32(CountNums.Text);//获得词数
                    foreach (var pair in freqs.MostCommon(nums))//获得部分词频
                    {
                        res += $"{pair.Key} : {pair.Value}\n";
                    }
                }
                catch
                {
                    MessageBox.Show("输入的不是数字或数字超出范围。", "提示");
                    return;
                }
            }
            
            long end = System.DateTime.Now.Ticks; //100毫微秒
            int time = (int)((end - begin) / 10000); //ms
            OutPutRichBox.Text = res;
            TimetBox.Text = "本次统计耗时:" + (time) + "ms";
        }

        private void label3_Click(object sender, EventArgs e)//帮助提示
        {
            MessageBox.Show("输入多少就会显示多少个词语。\n不输入数字默认显示全部词语的词频。\n越高频的词语排越前。" +
                "\n勾选了去除标点符号将不统计标点的频数。\n勾选去除停用词则会去除停用词词数。", "帮助说明",MessageBoxButtons.OK);
        }
        private void label6_Click(object sender, EventArgs e)
        { 
            MessageBox.Show("根据最高词频过滤出重要句子，将输出所有包含最高频词的句子。", "帮助说明", MessageBoxButtons.OK);
        }
        private void 关于我们ToolStripMenuItem_Click(object sender, EventArgs e)
        {
            Minform form = new Minform();
            form.Show();
        }

        private void CopyBtn_Click(object sender, EventArgs e)//复制按钮
        {
            OutPutRichBox.Focus();
            OutPutRichBox.SelectAll();
            OutPutRichBox.Copy();
        }

        private void OutputToInputBtn_Click(object sender, EventArgs e)//将结果替换输入框
        {
            long begin = System.DateTime.Now.Ticks; //100毫微秒
            InputRichBox.Text = OutPutRichBox.Text;
            long end = System.DateTime.Now.Ticks; //100毫微秒
            int time = (int)((end - begin) / 10000); //ms
            TimetBox.Text = "本次统计耗时:" + (time) + "ms";
        }

        private void ReplaceBtn_Click(object sender, EventArgs e)//用户替换指定词语
        {
            long begin = System.DateTime.Now.Ticks; //100毫微秒
            if (OldWordText.Text == string.Empty|| InputRichBox.Text == string.Empty) return;
            if (InputRichBox.Text.IndexOf(OldWordText.Text)>=0)//判断是否存在
            {
                OutPutRichBox.Text = InputRichBox.Text.Replace(OldWordText.Text, NewWordText.Text);
                long end = System.DateTime.Now.Ticks; //100毫微秒
                int time = (int)((end - begin) / 10000); //ms
                TimetBox.Text = "本次统计耗时:" + (time) + "ms";
            }
                
            else
                MessageBox.Show("词语不存在在输入文本中", "提示");

        }

        private void ClearTextBox_Click(object sender, EventArgs e)//清空输出框
        {
            OutPutRichBox.Clear();
        }

        private void SignImportantSentence_Click(object sender, EventArgs e)//标记重点句子
        {
            if (InputRichBox.Text == string.Empty) return;
            long begin = System.DateTime.Now.Ticks;
            string res = "";//存放结果
            InputStr = InputRichBox.Text;
            InputStr = FiterSomething(InputStr, 1);//过滤符号
            InputStr = FiterSomething(InputStr, 2);//去除停用词
            var segmenter = new JiebaSegmenter();//实例化
            var freqs = new Counter<string>(segmenter.Cut(InputStr));//词频类
            string mostInportant = "";
            int Max = 0;//最大数        
            if (InputStr != string.Empty)
            {
                foreach (var pair in freqs.MostCommon())//比对词频获得最大词频词
                {
                    if (pair.Value > Max)
                    {
                        Max = pair.Value;
                        mostInportant = pair.Key;
                    }
                }
            }
            string[] strArray = new string[InputRichBox.Text.Length];//因为要统计原本的，所以这里用TextBox的
            if (InputRichBox.Text.Contains("。"))//枚举情况，如果有句号就首选用句号做分割。
                strArray = InputRichBox.Text.Split('。');
            else if (InputRichBox.Text.Contains(","))
                strArray = InputRichBox.Text.Split(',');
            else if (InputRichBox.Text.Contains(";"))
                strArray = InputRichBox.Text.Split(';');
            else if (InputRichBox.Text.Contains(" "))
                strArray = InputRichBox.Text.Split(' ');
            else if (InputRichBox.Text.Contains("."))
                strArray = InputRichBox.Text.Split('.');
            else
                strArray = InputRichBox.Text.Split();
            foreach (string sentence in strArray)
            {
                if (sentence.IndexOf(mostInportant) > -1)
                    res += sentence + "。" + "\n";
            }
            OutPutRichBox.Text = res==string.Empty?InputRichBox.Text:res;
            long end = System.DateTime.Now.Ticks; //100毫微秒
            int time = (int)((end - begin) / 10000); //ms
            TimetBox.Text = "本次统计耗时:" + (time) + "ms";
        }

        private string FiterSomething(string str,int nums)//过滤标点符号或者停用词
        {
            if(nums==1)//过滤符号
            {
                str = Regex.Replace(str, "[\\s\\p{P}\n\r=<>$>+￥^]", "");//去除标点符号
                
            }
            else if(nums==2)
            {
                foreach (string word in stop_word)
                {
                    str = InputStr.Replace(word, "");//去除停用词
                }

            }
            return str;
        }

        /// <summary>
        /// 从指定文本中抽取关键词的同时得到其权重
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        static WordWeightPair[] ExtractTagsWithWeight(string text)
        {
            var extractor = new TfidfExtractor();
            var wordWeight = extractor.ExtractTagsWithWeight(text, 50);
            return wordWeight.ToArray();
        }
        /// <summary>
        /// 分词并统计词频：默认为精确模式，同时也使用HMM模型
        /// </summary>
        /// <param name="text"></param>
        /// <param name="wordWeightAry"></param>
        /// <returns></returns>
        static KeyValuePair<string, int>[] Counter(string text, WordWeightPair[] wordWeightAry)
        {
            var segmenter = new JiebaSegmenter();
            var segments = segmenter.Cut(text);
            var freqs = new Counter<string>(segments);//获得所有词的词频
            KeyValuePair<string, int>[] countAry = new KeyValuePair<string, int>[wordWeightAry.Length];//创建等长数组
            for (int i = 0; i < wordWeightAry.Length; i++)
            {
                string key = wordWeightAry[i].Word;
                countAry[i] = new KeyValuePair<string, int>(key, freqs[key]);//赋值
            }
            return countAry;
        }
        /// <summary>
        /// 创建词云图
        /// </summary>
        /// <param name="countAry"></param>
        static void CreateWordCloud(KeyValuePair<string, int>[] countAry,RichTextBox rich)
        {
            //string markPath = "mask.jpg";
            string resultPath = "result.jpg";
            //Console.WriteLine("开始生成图片，读取蒙版：" + markPath);
            //Image mask = Image.FromFile(markPath);
            //使用蒙版图片
            //var wordCloud = new WordCloud(mask.Width, mask.Height, mask: mask, allowVerical: true, fontname: "YouYuan");
            //不使用蒙版图片
            var wordCloud = new WordCloud(1000, 1000,false, null,-1,1,null, false);
            var result = wordCloud.Draw(countAry.Select(it => it.Key).ToList(), countAry.Select(it => it.Value).ToList());
            result.Save(resultPath);
            rich.Text=("图片生成完成，已保存在程序目录下，保存图片：" + resultPath);
        }

        private void CloudBtn_Click(object sender, EventArgs e)//生成词云Main方法
        {
            if (InputRichBox.Text == string.Empty) return;
            DialogResult dialog = MessageBox.Show("是否生成词云？时间可能较长，需要等待十秒左右。", "提示", MessageBoxButtons.OKCancel);
            if (dialog != DialogResult.OK) return;//如果不是ok就返回
            TimetBox.Text = "正在生成……请等待";           
            long begin = System.DateTime.Now.Ticks;
            InputStr = InputRichBox.Text;
            if (InputStr == string.Empty) return;//空字符串返回
            var wordWeight = ExtractTagsWithWeight(InputStr);//抽取关键词获得权重
            var wordFreqs = Counter(InputStr, wordWeight);//分词统计词频
            if(WordsWeightcheckBox.Checked)
            {
                StringBuilder sbr = new StringBuilder();
                sbr.Append("词语");
                sbr.Append(",");
                sbr.Append("词频");
                sbr.AppendLine(",");
                foreach (var pair in wordFreqs)
                {
                    sbr.Append(pair.Key);
                    sbr.Append(",");
                    sbr.Append(pair.Value);
                    sbr.AppendLine(",");
                }
                string filename = "词频统计结果.txt";
                File.WriteAllText(filename, sbr.ToString(), Encoding.UTF8);
                OutPutRichBox.Text += ("词频统计完成,已保存在程序目录下，保存文件：" + filename);
                                
            }
            CreateWordCloud(wordFreqs,OutPutRichBox);//生成词云
            long end = System.DateTime.Now.Ticks; //100毫微秒
            int time = (int)((end - begin) / 10000); //ms
            TimetBox.Text = "本次统计耗时:" + (time) + "ms";
        }

        private void SaveBtn_Click(object sender, EventArgs e)//保存
        {
            if (OutPutRichBox.Text == string.Empty) return;
            DialogResult dialog = MessageBox.Show("是否保存当前输出结果，保存文件将为txt文件。", "提示", MessageBoxButtons.OKCancel);
            if (dialog != DialogResult.OK) return;//如果不是ok就返回
            string filename = "输出结果.txt";
            File.WriteAllText(filename, OutPutRichBox.Text, Encoding.UTF8);
            MessageBox.Show("\n结果已保存至程序目录下，保存文件：" + filename,"提示");
        }

        private void Copy_Click(object sender, EventArgs e)//右键快捷复制事件
        {
            string copytext = InputRichBox.SelectedText;
            Clipboard.SetDataObject(copytext);
        }

        private void Paste_Click(object sender, EventArgs e)//右键快捷黏贴事件
        {
            IDataObject iData = Clipboard.GetDataObject();
            if (iData.GetDataPresent(DataFormats.Text))
            {
                string strPaste = (string)iData.GetData(DataFormats.Text);
                if(InputRichBox.SelectedText!=string.Empty)
                    InputRichBox.Text = InputRichBox.Text.Replace(InputRichBox.SelectedText, strPaste);//如果选中就替换
                else
                    InputRichBox.Text = InputRichBox.Text.Insert(InputRichBox.SelectionStart,strPaste);//没选中就插入
            }
                
                
        }

    }
}
